rand_data = runif(1000, 0, 20)
#In order to take input from the user
#n = readline(prompt="Enter the power parameter : ")
#n = as.integer(n)
n = 2 # We take n=3 for the purpose of this homework.
pwr_df <- data.frame(rand_data, pwr_data = (rand_data) ^ (n))
plot(rand_data, pwr_df$pwr_data)
library(neuralnet)
set.seed(1234)
net.pwr <- neuralnet(pwr_data ~ rand_data, pwr_df, hidden=50, threshold=0.1, stepmax = 1e06)
#print(net.pwr)
test_data <- seq(0, 30, 0.1)
test_data_pwr <- (test_data) ^ n
pred_pwr <- compute(net.pwr, test_data)$net.result
plot(pred_pwr, test_data_pwr, xlim=c(0, 900), ylim=c(0,900))
abline(0,1, col="red", lty=2)
legend("bottomright", c("Pred vs. Actual Power", "Pred=Actual Line"), cex=0.8, lty=c(1,2), lwd=c(2,2),col=c("black","red"))
compare_df <-data.frame(pred_pwr, test_data_pwr)
plot(test_data, test_data_pwr)
lines(test_data, pred_pwr, pch=22, col="red", lty=2)
legend("bottomright", c("Actual Power","Predicted Power"), lty=c(1,2), lwd=c(2,2),col=c("black","red"))
#### We can obsever in the above plot that the actual power value is ver close to the predicted power, although upto some extent, after which it breaks. After trying it with diifrent number of hidden layers, this seems to the better solution.
#setwd("~/Downloads")
als_train = read.csv("ALS_TrainingData_2223.csv")
head(als_train)
## ID Age_mean Albumin_max Albumin_median Albumin_min Albumin_range
## 1 1 65 57 40.5 38 0.066202091
## 2 2 48 45 41.0 39 0.010452962
## 3 3 38 50 47.0 45 0.008928571
## 4 4 63 47 44.0 41 0.012111135
## 5 5 63 47 45.5 42 0.008291874
## 6 6 36 51 47.0 46 0.009057971
## ALSFRS_slope ALSFRS_Total_max ALSFRS_Total_median ALSFRS_Total_min
## 1 -0.965608466 30 28.0 22
## 2 -0.921717172 37 33.0 21
## 3 -0.914786967 24 14.0 10
## 4 -0.598360656 30 29.0 24
## 5 -0.444038929 32 27.5 20
## 6 -0.118352789 37 34.5 27
## ALSFRS_Total_range ALT.SGPT._max ALT.SGPT._median ALT.SGPT._min
## 1 0.021164021 24 22.0 18
## 2 0.028725314 25 13.0 8
## 3 0.025000000 25 20.0 14
## 4 0.014962594 62 60.0 41
## 5 0.020373514 38 26.5 22
## 6 0.018115942 34 23.0 18
## ALT.SGPT._range AST.SGOT._max AST.SGOT._median AST.SGOT._min
## 1 0.020905923 31 27.5 23
## 2 0.029616725 31 17.0 14
## 3 0.019642857 24 19.0 18
## 4 0.052369077 46 40.0 33
## 5 0.026533997 35 26.5 20
## 6 0.028985507 31 26.0 21
## AST.SGOT._range Bicarbonate_max Bicarbonate_median Bicarbonate_min
## 1 0.027874564 30 28 25
## 2 0.029616725 32 28 25
## 3 0.010714286 35 29 24
## 4 0.032418953 23 20 20
## 5 0.024875622 32 28 23
## 6 0.018115942 29 26 22
## Bicarbonate_range Blood.Urea.Nitrogen..BUN._max
## 1 0.017421603 8.0322
## 2 0.012195122 8.3973
## 3 0.019642857 5.4765
## 4 0.007481297 8.0322
## 5 0.014925373 5.1114
## 6 0.012681159 6.5718
## Blood.Urea.Nitrogen..BUN._median Blood.Urea.Nitrogen..BUN._min
## 1 7.11945 6.5718
## 2 4.74630 4.0161
## 3 4.38120 3.6510
## 4 8.03220 6.5718
## 5 4.19865 3.6510
## 6 5.11140 4.0161
## Blood.Urea.Nitrogen..BUN._range bp_diastolic_max bp_diastolic_median
## 1 0.005088502 90 83
## 2 0.007632753 80 78
## 3 0.003259821 86 76
## 4 0.003641895 90 80
## 5 0.002421891 100 80
## 6 0.004629891 84 80
## bp_diastolic_min bp_diastolic_range bp_systolic_max bp_systolic_median
## 1 69 0.055555556 160 139.0
## 2 64 0.028725314 140 132.5
## 3 58 0.050000000 120 110.0
## 4 70 0.049875312 150 130.0
## 5 68 0.053067993 160 130.0
## 6 60 0.043478261 140 115.0
## bp_systolic_min bp_systolic_range Calcium_max Calcium_median Calcium_min
## 1 129 0.082010582 2.49500 2.220550 2.22055
## 2 104 0.064631957 2.32035 2.170650 2.02095
## 3 90 0.053571429 2.47005 2.295400 2.19560
## 4 120 0.074812968 2.47005 2.345300 2.23000
## 5 104 0.092868988 2.42015 2.257975 2.17065
## 6 100 0.072463768 2.39520 2.270450 2.17065
## Calcium_range Chloride_max Chloride_median Chloride_min Chloride_range
## 1 0.000956272 109 108 103 0.020905923
## 2 0.000521603 108 102 100 0.013937282
## 3 0.000490089 108 106 104 0.007142857
## 4 0.000473934 109 107 106 0.007481297
## 5 0.000413765 107 104 100 0.011608624
## 6 0.000406793 110 105 101 0.016304348
## Creatinine_max Creatinine_median Creatinine_min Creatinine_range
## 1 79.56 79.56 70.72 0.030801394
## 2 61.88 53.04 44.20 0.030801394
## 3 88.40 79.56 70.72 0.031571429
## 4 70.72 61.88 53.04 0.044089776
## 5 61.88 48.62 26.52 0.058640133
## 6 106.08 88.40 70.72 0.064057971
## Gender_mean Glucose_max Glucose_median Glucose_min Glucose_range
## 1 1 7.4370 4.4955 4.2180 0.011216028
## 2 1 6.7710 4.9950 4.0515 0.004737805
## 3 2 5.6610 5.1060 4.2180 0.002576786
## 4 2 5.1060 4.7730 4.6620 0.001107232
## 5 1 7.4925 5.7165 5.0505 0.004049751
## 6 2 5.5500 5.1060 4.4400 0.002010870
## hands_max hands_median hands_min hands_range Hematocrit_max
## 1 8 7.5 6 0.005291005 44.6
## 2 8 6.0 6 0.003590664 41.9
## 3 4 1.0 0 0.007142857 49.1
## 4 6 5.5 4 0.004987531 46.3
## 5 8 6.5 3 0.008488964 44.0
## 6 8 7.0 5 0.005434783 46.8
## Hematocrit_median Hematocrit_min Hematocrit_range Hemoglobin_max
## 1 43.15 40.7 0.013588850 156
## 2 39.60 37.7 0.007317073 138
## 3 46.20 44.0 0.009107143 161
## 4 43.00 41.7 0.011471322 154
## 5 42.85 39.5 0.007462687 152
## 6 43.50 41.9 0.008876812 157
## Hemoglobin_median Hemoglobin_min Hemoglobin_range leg_max leg_median
## 1 146.0 143 0.045296167 8 6.5
## 2 132.0 128 0.017421603 8 7.5
## 3 154.0 151 0.017857143 4 3.0
## 4 145.0 144 0.024937656 4 3.5
## 5 146.5 138 0.023217247 2 2.0
## 6 146.0 142 0.027173913 8 8.0
## leg_min leg_range mouth_max mouth_median mouth_min mouth_range
## 1 4 0.010582011 5 3.5 0 0.013227513
## 2 3 0.008976661 9 8.0 4 0.008976661
## 3 2 0.003571429 10 7.0 4 0.010714286
## 4 2 0.004987531 12 12.0 12 0.000000000
## 5 0 0.003395586 12 12.0 12 0.000000000
## 6 4 0.007246377 9 8.0 7 0.003623188
## onset_delta_mean onset_site_mean Platelets_max Platelets_median
## 1 -1023 1 172 169.0
## 2 -341 1 286 264.0
## 3 -1181 1 233 213.0
## 4 -365 2 275 233.0
## 5 -1768 2 313 283.5
## 6 -334 1 220 194.0
## Platelets_min Potassium_max Potassium_median Potassium_min
## 1 152 4.5 4.25 4.0
## 2 230 5.0 4.30 3.9
## 3 167 4.1 4.00 3.9
## 4 204 4.3 4.20 4.0
## 5 268 4.6 3.75 3.5
## 6 178 4.5 4.30 4.2
## Potassium_range pulse_max pulse_median pulse_min pulse_range
## 1 0.001742160 79 68 61 0.047619048
## 2 0.001916376 90 76 64 0.046678636
## 3 0.000357143 82 73 60 0.039285714
## 4 0.000748130 84 72 68 0.039900249
## 5 0.001824212 101 96 74 0.044776119
## 6 0.000543478 88 66 60 0.050724638
## respiratory_max respiratory_median respiratory_min respiratory_range
## 1 4 3 3 0.002645503
## 2 4 4 3 0.001795332
## 3 4 4 4 0.000000000
## 4 3 3 3 0.000000000
## 5 4 4 3 0.001697793
## 6 4 4 3 0.001811594
## Sodium_max Sodium_median Sodium_min Sodium_range SubjectID trunk_max
## 1 148 145.5 143 0.017421603 533 8
## 2 142 138.0 136 0.010452962 649 8
## 3 145 143.0 140 0.008928571 1234 5
## 4 143 139.0 138 0.012468828 2492 5
## 5 143 140.0 138 0.008291874 2956 6
## 6 145 141.0 137 0.014492754 3085 8
## trunk_median trunk_min trunk_range Urine.Ph_max Urine.Ph_median
## 1 7 7 0.002645503 6 6
## 2 7 5 0.005385996 7 5
## 3 0 0 0.008928571 6 5
## 4 5 3 0.004987531 7 6
## 5 4 1 0.008488964 6 5
## 6 8 7 0.001811594 8 6
## Urine.Ph_min
## 1 6
## 2 5
## 3 5
## 4 5
## 5 5
## 6 5
str(als_train)
## 'data.frame': 2223 obs. of 101 variables:
## $ ID : int 1 2 3 4 5 6 7 8 9 11 ...
## $ Age_mean : int 65 48 38 63 63 36 55 55 37 72 ...
## $ Albumin_max : num 57 45 50 47 47 51 46 45 48 44 ...
## $ Albumin_median : num 40.5 41 47 44 45.5 47 44 42 46 42 ...
## $ Albumin_min : num 38 39 45 41 42 46 40 38 41 38 ...
## $ Albumin_range : num 0.0662 0.01045 0.00893 0.01211 0.00829 ...
## $ ALSFRS_slope : num -0.966 -0.922 -0.915 -0.598 -0.444 ...
## $ ALSFRS_Total_max : int 30 37 24 30 32 37 34 30 35 28 ...
## $ ALSFRS_Total_median : num 28 33 14 29 27.5 34.5 24 27.5 28.5 25.5 ...
## $ ALSFRS_Total_min : int 22 21 10 24 20 27 10 20 24 23 ...
## $ ALSFRS_Total_range : num 0.0212 0.0287 0.025 0.015 0.0204 ...
## $ ALT.SGPT._max : num 24 25 25 62 38 34 80 38 47 39 ...
## $ ALT.SGPT._median : num 22 13 20 60 26.5 23 46 27 42 20 ...
## $ ALT.SGPT._min : num 18 8 14 41 22 18 19 15 25 11 ...
## $ ALT.SGPT._range : num 0.0209 0.0296 0.0196 0.0524 0.0265 ...
## $ AST.SGOT._max : int 31 31 24 46 35 31 57 26 43 49 ...
## $ AST.SGOT._median : num 27.5 17 19 40 26.5 26 37 25 30 24 ...
## $ AST.SGOT._min : num 23 14 18 33 20 21 22 16 24 17 ...
## $ AST.SGOT._range : num 0.0279 0.0296 0.0107 0.0324 0.0249 ...
## $ Bicarbonate_max : num 30 32 35 23 32 29 32 29 36 32 ...
## $ Bicarbonate_median : num 28 28 29 20 28 26 27.5 28 29 29.5 ...
## $ Bicarbonate_min : num 25 25 24 20 23 22 23 25 20 27 ...
## $ Bicarbonate_range : num 0.01742 0.0122 0.01964 0.00748 0.01493 ...
## $ Blood.Urea.Nitrogen..BUN._max : num 8.03 8.4 5.48 8.03 5.11 ...
## $ Blood.Urea.Nitrogen..BUN._median: num 7.12 4.75 4.38 8.03 4.2 ...
## $ Blood.Urea.Nitrogen..BUN._min : num 6.57 4.02 3.65 6.57 3.65 ...
## $ Blood.Urea.Nitrogen..BUN._range : num 0.00509 0.00763 0.00326 0.00364 0.00242 ...
## $ bp_diastolic_max : int 90 80 86 90 100 84 98 80 90 90 ...
## $ bp_diastolic_median : num 83 78 76 80 80 80 86 74 80 80 ...
## $ bp_diastolic_min : int 69 64 58 70 68 60 80 64 80 70 ...
## $ bp_diastolic_range : num 0.0556 0.0287 0.05 0.0499 0.0531 ...
## $ bp_systolic_max : int 160 140 120 150 160 140 134 134 135 140 ...
## $ bp_systolic_median : num 139 132 110 130 130 ...
## $ bp_systolic_min : int 129 104 90 120 104 100 110 104 115 120 ...
## $ bp_systolic_range : num 0.082 0.0646 0.0536 0.0748 0.0929 ...
## $ Calcium_max : num 2.5 2.32 2.47 2.47 2.42 ...
## $ Calcium_median : num 2.22 2.17 2.3 2.35 2.26 ...
## $ Calcium_min : num 2.22 2.02 2.2 2.23 2.17 ...
## $ Calcium_range : num 0.000956 0.000522 0.00049 0.000474 0.000414 ...
## $ Chloride_max : num 109 108 108 109 107 110 108 107 110 103 ...
## $ Chloride_median : num 108 102 106 107 104 105 104 106 105 99 ...
## $ Chloride_min : num 103 100 104 106 100 101 100 101 101 95 ...
## $ Chloride_range : num 0.02091 0.01394 0.00714 0.00748 0.01161 ...
## $ Creatinine_max : num 79.6 61.9 88.4 70.7 61.9 ...
## $ Creatinine_median : num 79.6 53 79.6 61.9 48.6 ...
## $ Creatinine_min : num 70.7 44.2 70.7 53 26.5 ...
## $ Creatinine_range : num 0.0308 0.0308 0.0316 0.0441 0.0586 ...
## $ Gender_mean : int 1 1 2 2 1 2 2 1 2 1 ...
## $ Glucose_max : num 7.44 6.77 5.66 5.11 7.49 ...
## $ Glucose_median : num 4.5 5 5.11 4.77 5.72 ...
## $ Glucose_min : num 4.22 4.05 4.22 4.66 5.05 ...
## $ Glucose_range : num 0.01122 0.00474 0.00258 0.00111 0.00405 ...
## $ hands_max : int 8 8 4 6 8 8 6 8 6 8 ...
## $ hands_median : num 7.5 6 1 5.5 6.5 7 4 8 1.5 7 ...
## $ hands_min : int 6 6 0 4 3 5 1 5 0 6 ...
## $ hands_range : num 0.00529 0.00359 0.00714 0.00499 0.00849 ...
## $ Hematocrit_max : num 44.6 41.9 49.1 46.3 44 46.8 50.5 45.5 48 42 ...
## $ Hematocrit_median : num 43.1 39.6 46.2 43 42.9 ...
## $ Hematocrit_min : num 40.7 37.7 44 41.7 39.5 41.9 44.1 37.1 45 38 ...
## $ Hematocrit_range : num 0.01359 0.00732 0.00911 0.01147 0.00746 ...
## $ Hemoglobin_max : num 156 138 161 154 152 157 165 152 156 139 ...
## $ Hemoglobin_median : num 146 132 154 145 146 ...
## $ Hemoglobin_min : num 143 128 151 144 138 142 151 122 149 125 ...
## $ Hemoglobin_range : num 0.0453 0.0174 0.0179 0.0249 0.0232 ...
## $ leg_max : int 8 8 4 4 2 8 8 1 8 1 ...
## $ leg_median : num 6.5 7.5 3 3.5 2 8 6 0.5 8 0 ...
## $ leg_min : int 4 3 2 2 0 4 4 0 5 0 ...
## $ leg_range : num 0.01058 0.00898 0.00357 0.00499 0.0034 ...
## $ mouth_max : int 5 9 10 12 12 9 10 12 12 12 ...
## $ mouth_median : num 3.5 8 7 12 12 8 6 12 12 12 ...
## $ mouth_min : int 0 4 4 12 12 7 0 10 12 11 ...
## $ mouth_range : num 0.01323 0.00898 0.01071 0 0 ...
## $ onset_delta_mean : int -1023 -341 -1181 -365 -1768 -334 -268 -763 -440 -1324 ...
## $ onset_site_mean : int 1 1 1 2 2 1 2 2 2 2 ...
## $ Platelets_max : int 172 286 233 275 313 220 245 487 149 378 ...
## $ Platelets_median : num 169 264 213 233 284 ...
## $ Platelets_min : num 152 230 167 204 268 178 191 212 109 281 ...
## $ Potassium_max : num 4.5 5 4.1 4.3 4.6 4.5 4.5 4.7 4.6 4.4 ...
## $ Potassium_median : num 4.25 4.3 4 4.2 3.75 4.3 4.1 4.5 4.3 3.7 ...
## $ Potassium_min : num 4 3.9 3.9 4 3.5 4.2 3.6 4.2 4 3.2 ...
## $ Potassium_range : num 0.001742 0.001916 0.000357 0.000748 0.001824 ...
## $ pulse_max : int 79 90 82 84 101 88 96 100 84 100 ...
## $ pulse_median : num 68 76 73 72 96 66 80 80 68 100 ...
## $ pulse_min : int 61 64 60 68 74 60 66 64 59 80 ...
## $ pulse_range : num 0.0476 0.0467 0.0393 0.0399 0.0448 ...
## $ respiratory_max : int 4 4 4 3 4 4 4 4 4 4 ...
## $ respiratory_median : num 3 4 4 3 4 4 3 4 4 4 ...
## $ respiratory_min : int 3 3 4 3 3 3 2 1 4 4 ...
## $ respiratory_range : num 0.00265 0.0018 0 0 0.0017 ...
## $ Sodium_max : num 148 142 145 143 143 145 145 145 146 147 ...
## $ Sodium_median : num 146 138 143 139 140 ...
## $ Sodium_min : num 143 136 140 138 138 137 136 139 138 132 ...
## $ Sodium_range : num 0.01742 0.01045 0.00893 0.01247 0.00829 ...
## $ SubjectID : int 533 649 1234 2492 2956 3085 3551 3971 4390 4772 ...
## $ trunk_max : int 8 8 5 5 6 8 7 5 6 3 ...
## $ trunk_median : num 7 7 0 5 4 8 5 3 3 3 ...
## $ trunk_min : int 7 5 0 3 1 7 2 2 2 1 ...
## $ trunk_range : num 0.00265 0.00539 0.00893 0.00499 0.00849 ...
## $ Urine.Ph_max : num 6 7 6 7 6 8 9 6 7 7 ...
## [list output truncated]
plot(als_train$ALT.SGPT._median, als_train$ALSFRS_slope, pch = 19, col=ifelse(als_train$Gender_mean==1, "red", "blue"))
legend("topright", pch=c(19,19), col=c("red", "pink"), c("Male", "Female"), bty="o", cex=1.1, box.col="darkgreen")
plot(als_train$AST.SGOT._median, als_train$ALSFRS_slope, pch = 19, col=ifelse(als_train$Gender_mean==1, "red", "blue"))
legend("topright", pch=c(19,19), col=c("red", "blue"), c("Male", "Female"), bty="o", cex=1.1, box.col="darkgreen")
plot(als_train$Creatinine_median, als_train$ALSFRS_slope, pch = 19, col=ifelse(als_train$Gender_mean==1, "red", "blue"))
legend("topright", pch=c(19,19), col=c("red", "blue"), c("Male", "Female"), bty="o", cex=1.1, box.col="darkgreen")
plot(als_train$Glucose_median, als_train$ALSFRS_slope, pch = 19, col=ifelse(als_train$Gender_mean==1, "red", "blue"))
legend("topright", pch=c(19,19), col=c("red", "blue"), c("Male", "Female"), bty="o", cex=1.1, box.col="darkgreen")
plot(als_train$Hematocrit_median, als_train$ALSFRS_slope, pch = 19, col=ifelse(als_train$Gender_mean==1, "red", "blue"))
legend("bottomleft", pch=c(19,19), col=c("red", "blue"), c("Male", "Female"), bty="o", cex=1.1, box.col="darkgreen")
plot(als_train$Platelets_median, als_train$ALSFRS_slope, pch = 19, col=ifelse(als_train$Gender_mean==1, "red", "blue"))
legend("topright", pch=c(19,19), col=c("red", "blue"), c("Male", "Female"), bty="o", cex=1.1, box.col="darkgreen")
plot(als_train$Potassium_median , als_train$ALSFRS_slope, pch = 19, col=ifelse(als_train$Gender_mean==1, "red", "blue"))
legend("topright", pch=c(19,19), col=c("red", "blue"), c("Male", "Female"), bty="o", cex=1.1, box.col="darkgreen")
plot(als_train$pulse_median, als_train$ALSFRS_slope, pch = 16, col=ifelse(als_train$Gender_mean==1, "red", "blue"))
legend("topright", pch=c(19,19), col=c("red", "blue"), c("Male", "Female"), bty="o", cex=1.1, box.col="darkgreen")
plot(als_train$respiratory_median, als_train$ALSFRS_slope, pch = 19, col=ifelse(als_train$Gender_mean==1, "red", "blue"))
legend("topright", pch=c(19,19), col=c("red", "blue"), c("Male", "Female"), bty="o", cex=1.1, box.col="darkgreen")
als_train = als_train[, c(7, 13, 17, 45, 48, 50, 58, 76, 78, 82, 87 )]
str(als_train)
## 'data.frame': 2223 obs. of 11 variables:
## $ ALSFRS_slope : num -0.966 -0.922 -0.915 -0.598 -0.444 ...
## $ ALT.SGPT._median : num 22 13 20 60 26.5 23 46 27 42 20 ...
## $ AST.SGOT._median : num 27.5 17 19 40 26.5 26 37 25 30 24 ...
## $ Creatinine_median : num 79.6 53 79.6 61.9 48.6 ...
## $ Gender_mean : int 1 1 2 2 1 2 2 1 2 1 ...
## $ Glucose_median : num 4.5 5 5.11 4.77 5.72 ...
## $ Hematocrit_median : num 43.1 39.6 46.2 43 42.9 ...
## $ Platelets_median : num 169 264 213 233 284 ...
## $ Potassium_max : num 4.5 5 4.1 4.3 4.6 4.5 4.5 4.7 4.6 4.4 ...
## $ pulse_max : int 79 90 82 84 101 88 96 100 84 100 ...
## $ respiratory_median: num 3 4 4 3 4 4 3 4 4 4 ...
als_train_scaled = as.data.frame(lapply(als_train, scale))
str(als_train_scaled)
## 'data.frame': 2223 obs. of 11 variables:
## $ ALSFRS_slope : num -0.381 -0.311 -0.3 0.209 0.457 ...
## $ ALT.SGPT._median : num -0.705 -1.281 -0.833 1.731 -0.416 ...
## $ AST.SGOT._median : num -0.164 -1.259 -1.05 1.139 -0.269 ...
## $ Creatinine_median : num 0.828 -0.7 0.828 -0.191 -0.955 ...
## $ Gender_mean : num -1.326 -1.326 0.754 0.754 -1.326 ...
## $ Glucose_median : num -0.797 -0.396 -0.306 -0.574 0.184 ...
## $ Hematocrit_median : num 0.304 0.011 0.555 0.291 0.279 ...
## $ Platelets_median : num -1.326 0.479 -0.49 -0.11 0.85 ...
## $ Potassium_max : num -0.0969 0.2804 -0.3986 -0.2477 -0.0214 ...
## $ pulse_max : num -1.0484 -0.0578 -0.7783 -0.5981 0.9327 ...
## $ respiratory_median: num -0.968 0.665 0.665 -0.968 0.665 ...
library(stats)
set.seed(321)
als_clusters = kmeans(als_train_scaled, 4)
als_clusters$size
## [1] 322 469 727 705
require(cluster)
## Loading required package: cluster
dis = dist(als_train_scaled)
sil = silhouette(als_clusters$cluster, dis)
summary(sil)
## Silhouette of 2223 units in 4 clusters from silhouette.default(x = als_clusters$cluster, dist = dis) :
## Cluster sizes and average silhouette widths:
## 322 469 727 705
## 0.12820155816 0.02249440513 0.17184749584 0.19290026332
## Individual silhouette widths:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.11509768 0.06421337 0.14735258 0.14069212 0.21914407 0.38073137
plot(sil, border = NA)
par(mfrow=c(1, 1), mar=c(4, 4, 4, 2))
myColors <- c("darkblue", "red", "green", "brown", "pink", "purple", "yellow", "orange", "black", "grey", "violet")
barplot(t(als_clusters$centers), beside = TRUE, xlab="cluster",
ylab="value", col = myColors)
legend("top", ncol=2, legend = c("ALSFRS_Slope", "ALT.SGPT._median", "AST.SGOT._median", "Creatinine_median", "Glucose_median", "Hematocrit_median", "Platelets_median", "Potassium_median", "pulse_median", "respiratory_median"), fill = myColors)
library(matrixStats)
## Warning: package 'matrixStats' was built under R version 3.4.3
kpp_init = function(dat, K) {
x = as.matrix(dat)
n = nrow(x)
# Randomly choose a first center
centers = matrix(NA, nrow=K, ncol=ncol(x))
set.seed(123)
centers[1,] = as.matrix(x[sample(1:n, 1),])
for (k in 2:K) {
# Calculate dist^2 to closest center for each point
dists = matrix(NA, nrow=n, ncol=k-1)
for (j in 1:(k-1)) {
temp = sweep(x, 2, centers[j,], '-')
dists[,j] = rowSums(temp^2)
}
dists = rowMins(dists)
# Draw next center with probability proportional to dist^2
cumdists = cumsum(dists)
prop = runif(1, min=0, max=cumdists[n])
centers[k,] = as.matrix(x[min(which(cumdists > prop)),])
}
return(centers)
}
clust_kpp = kmeans(als_train_scaled, kpp_init(als_train_scaled, 4), iter.max=100, algorithm='Lloyd')
clust_kpp$centers
## ALSFRS_slope ALT.SGPT._median AST.SGOT._median Creatinine_median
## 1 0.009457390375 -0.1919809585 -0.2330132990 0.415148108699
## 2 -0.118766579920 -0.1053412037 -0.1458557826 -0.105103524451
## 3 -0.009312732085 1.6963808241 1.6327231516 -0.001095948809
## 4 0.021796588621 -0.5112895075 -0.4136034424 -0.549708219017
## Gender_mean Glucose_median Hematocrit_median Platelets_median
## 1 0.74982221394 0.10933449473 0.39810308814 -0.2215399708
## 2 -0.04146929977 -0.53747210963 -3.21995565628 0.3286348126
## 3 0.56943959706 0.02847891749 0.44424299826 -0.2704489901
## 4 -1.30225518785 -0.02740116268 0.06316811015 0.3519608975
## Potassium_max pulse_max respiratory_median
## 1 0.03284406005 -0.07376431826 -0.01153114509
## 2 -0.07953469167 -0.05784627999 0.10745254721
## 3 -0.03577779458 0.04925534086 0.05400532842
## 4 -0.00824139930 0.09405089762 -0.03722160605
sil2 = silhouette(clust_kpp$cluster, dis)
summary(sil2)
## Silhouette of 2223 units in 4 clusters from silhouette.default(x = clust_kpp$cluster, dist = dis) :
## Cluster sizes and average silhouette widths:
## 990 183 338 712
## 0.1451373911 0.2811985903 0.1437985383 0.2080572550
## Individual silhouette widths:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.02363685 0.11655112 0.17906602 0.17628701 0.23928280 0.41810262
plot(sil2, border = NA)
n_rows <- 15
mat = matrix(0,nrow = n_rows)
for (i in 2:n_rows){
set.seed(321)
clust_kpp = kmeans(als_train_scaled, kpp_init(als_train_scaled, i), iter.max=100, algorithm='Lloyd')
sil = silhouette(clust_kpp$cluster, dis)
mat[i] = mean(as.matrix(sil)[,3])
}
colnames(mat) <- c("Avg_Silhouette_Value")
mat
## Avg_Silhouette_Value
## [1,] 0.0000000000
## [2,] 0.2357663636
## [3,] 0.1775384456
## [4,] 0.1762870134
## [5,] 0.1642306600
## [6,] 0.1446392470
## [7,] 0.1501165902
## [8,] 0.1468196755
## [9,] 0.1405140565
## [10,] 0.1352907413
## [11,] 0.1307925216
## [12,] 0.1291584508
## [13,] 0.1351560706
## [14,] 0.1283448130
## [15,] 0.1235528266
library(ggplot2)
ggplot(data.frame(k=2:n_rows,sil=mat[2:n_rows]),aes(x=k,y=sil))+
geom_line()+
scale_x_continuous(breaks = 2:n_rows)
k = 3
set.seed(31)
clust_kpp = kmeans(als_train_scaled, kpp_init(als_train_scaled, k), iter.max=200, algorithm="MacQueen")
sil3 = silhouette(clust_kpp$cluster, dis)
summary(sil3)
## Silhouette of 2223 units in 3 clusters from silhouette.default(x = clust_kpp$cluster, dist = dis) :
## Cluster sizes and average silhouette widths:
## 743 183 1297
## 0.2302292899 0.2936640609 0.1309692205
## Individual silhouette widths:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.05753471 0.11170948 0.17908774 0.17753845 0.24177651 0.42254015
plot(sil3, border = NA)
library(cluster)
pitch_sing = agnes(als_train_scaled, diss=FALSE, method='single')
pitch_comp = agnes(als_train_scaled, diss=FALSE, method='complete')
pitch_ward = agnes(als_train_scaled, diss=FALSE, method='ward')
sil_sing = silhouette(cutree(pitch_sing, k=3), dis)
sil_comp = silhouette(cutree(pitch_comp, k=5), dis)
sil_ward = silhouette(cutree(pitch_ward, k=4), dis)
library(ggdendro)
ggdendrogram(as.dendrogram(pitch_ward), leaf_labels=FALSE, labels=FALSE)
mean(sil_ward[,"sil_width"])
## [1] 0.1673683107
ggdendrogram(as.dendrogram(pitch_ward), leaf_labels=TRUE, labels=T, size=10)
summary(sil_ward)
## Silhouette of 2223 units in 4 clusters from silhouette.default(x = cutree(pitch_ward, k = 4), dist = dis) :
## Cluster sizes and average silhouette widths:
## 748 1290 182 3
## 0.2308496789 0.1116674344 0.2920677011 0.7256276452
## Individual silhouette widths:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.17300143 0.09962072 0.16753389 0.16736831 0.23686987 0.79338869
summary(sil_comp)
## Silhouette of 2223 units in 5 clusters from silhouette.default(x = cutree(pitch_comp, k = 5), dist = dis) :
## Cluster sizes and average silhouette widths:
## 2184 26 9 3 1
## 0.4175241082 0.4173778610 0.4061845781 0.7265894469 0.0000000000
## Individual silhouette widths:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.3996074 0.3953038 0.4486734 0.4177058 0.4881391 0.7935982
plot(sil_ward, border = NA)
plot(sil_comp, border = NA)
library(mclust)
## Warning: package 'mclust' was built under R version 3.4.3
## Package 'mclust' version 5.4
## Type 'citation("mclust")' for citing this R package in publications.
set.seed(1234)
gmm_clust <- Mclust(als_train)
summary(gmm_clust, parameters = TRUE)
## ----------------------------------------------------
## Gaussian finite mixture model fitted by EM algorithm
## ----------------------------------------------------
##
## Mclust VEV (ellipsoidal, equal shape) model with 3 components:
##
## log.likelihood n df BIC ICL
## -58173.55486 2223 213 -117988.6183 -117989.814
##
## Clustering table:
## 1 2 3
## 226 1275 722
##
## Mixing probabilities:
## 1 2 3
## 0.1018241000 0.5734008286 0.3247750714
##
## Means:
## [,1] [,2] [,3]
## ALSFRS_slope -0.8200197169 -0.7233915218 -0.7081302531
## ALT.SGPT._median 34.0180756411 36.6610138175 26.1963970592
## AST.SGOT._median 29.2855010718 30.8136195739 25.9453358135
## Creatinine_median 64.0195481100 70.5752468653 56.0607124353
## Gender_mean 1.6287909441 2.0000000000 1.0000000000
## Glucose_median 6.0075219653 5.4598280770 5.3721272527
## Hematocrit_median 8.6553093852 44.5174123224 40.2111548035
## Platelets_median 251.8749017867 227.2257023774 255.0788761990
## Potassium_max 5.0408541388 4.5798696172 4.5846671539
## pulse_max 90.6906433320 90.0634759858 91.6493071585
## respiratory_median 3.6725058634 3.5914645491 3.5697604768
##
## Variances:
## [,,1]
## ALSFRS_slope ALT.SGPT._median AST.SGOT._median
## ALSFRS_slope 8.81446348733 -28.24232414 -16.8418432032
## ALT.SGPT._median -28.24232414086 8255.37291299 4301.0002556866
## AST.SGOT._median -16.84184320320 4301.00025569 3421.1883657596
## Creatinine_median 70.35851689487 206.22510911 365.2892940583
## Gender_mean 2.16258101758 16.62574552 0.9634979045
## Glucose_median -0.06888216003 56.97903467 9.9591992504
## Hematocrit_median -8.74610697374 2193.24451046 1291.8107850543
## Platelets_median -54.78189622066 -5567.82214450 -4028.0135003464
## Potassium_max 0.53610474442 -47.85912967 48.3685668793
## pulse_max -4.47692229756 175.26341695 204.5793040095
## respiratory_median 4.06177432780 -29.24132195 -1.9115266797
## Creatinine_median Gender_mean Glucose_median
## ALSFRS_slope 70.35851689 2.1625810176 -0.06888216003
## ALT.SGPT._median 206.22510911 16.6257455225 56.97903466993
## AST.SGOT._median 365.28929406 0.9634979045 9.95919925038
## Creatinine_median 8776.75677163 136.1754955021 98.09823125801
## Gender_mean 136.17549550 2.5745118523 2.96317935000
## Glucose_median 98.09823126 2.9631793500 156.17887078155
## Hematocrit_median 1091.80084023 21.7941051956 891.86868792555
## Platelets_median -6166.10771507 -205.3216056384 -754.81445506648
## Potassium_max 144.69717674 1.3595727903 59.15896136691
## pulse_max -46.04724075 -1.4515261409 64.28106396749
## respiratory_median 40.20605100 1.3561763981 2.17583698867
## Hematocrit_median Platelets_median Potassium_max
## ALSFRS_slope -8.746106974 -54.78189622 0.5361047444
## ALT.SGPT._median 2193.244510459 -5567.82214450 -47.8591296738
## AST.SGOT._median 1291.810785054 -4028.01350035 48.3685668793
## Creatinine_median 1091.800840231 -6166.10771507 144.6971767423
## Gender_mean 21.794105196 -205.32160564 1.3595727903
## Glucose_median 891.868687926 -754.81445507 59.1589613669
## Hematocrit_median 6229.369699490 -5082.06277750 398.0734119160
## Platelets_median -5082.062777503 97935.59927076 89.9066968249
## Potassium_max 398.073411916 89.90669682 64.3793847232
## pulse_max 518.311213389 -435.67026146 4.5813653115
## respiratory_median 11.293020065 -24.97913353 2.7420579015
## pulse_max respiratory_median
## ALSFRS_slope -4.476922298 4.061774328
## ALT.SGPT._median 175.263416946 -29.241321950
## AST.SGOT._median 204.579304009 -1.911526680
## Creatinine_median -46.047240747 40.206051004
## Gender_mean -1.451526141 1.356176398
## Glucose_median 64.281063967 2.175836989
## Hematocrit_median 518.311213389 11.293020065
## Platelets_median -435.670261455 -24.979133526
## Potassium_max 4.581365311 2.742057902
## pulse_max 302.948544903 -0.193968603
## respiratory_median -0.193968603 8.768682106
## [,,2]
## ALSFRS_slope
## ALSFRS_slope 0.376472764132785742141606988
## ALT.SGPT._median -0.195049753309820855795564398
## AST.SGOT._median 0.286927996220803394056275692
## Creatinine_median 0.670915842911480986288097483
## Gender_mean 0.000000000000000005310160414
## Glucose_median -0.025287237784790771166765211
## Hematocrit_median -0.010308991321030799154234714
## Platelets_median -1.617280711064517761954562047
## Potassium_max 0.002372327993399506347593775
## pulse_max -1.196659541573328544572518695
## respiratory_median 0.062912502190373198462935989
## ALT.SGPT._median
## ALSFRS_slope -0.195049753309820855795564
## ALT.SGPT._median 202.357485555664993626123760
## AST.SGOT._median 96.055006866366753115471511
## Creatinine_median -28.414437234828870515457311
## Gender_mean 0.000000000000008705617514
## Glucose_median 0.052307030710034875531367
## Hematocrit_median 6.582718204898245772938026
## Platelets_median -8.412778385798551639140896
## Potassium_max -0.101923720523368677159937
## pulse_max 9.097337487621983243002433
## respiratory_median 0.097542272362888454706464
## AST.SGOT._median
## ALSFRS_slope 0.286927996220803394056276
## ALT.SGPT._median 96.055006866366753115471511
## AST.SGOT._median 74.172736903935714281033142
## Creatinine_median -8.767553600938230218275748
## Gender_mean 0.000000000000001504849454
## Glucose_median -0.190870574473019360972614
## Hematocrit_median 2.210007362277997433608334
## Platelets_median -17.945234692036130752512690
## Potassium_max 0.068284452119214966714367
## pulse_max 0.740776803389732507731935
## respiratory_median 0.209070382447014452287348
## Creatinine_median
## ALSFRS_slope 0.67091584291148098628810
## ALT.SGPT._median -28.41443723482887051545731
## AST.SGOT._median -8.76755360093823021827575
## Creatinine_median 233.85971614495224457641598
## Gender_mean 0.00000000000000243141532
## Glucose_median -0.31915666464636965882207
## Hematocrit_median -2.78567192497759119618195
## Platelets_median -95.30468321024039823896601
## Potassium_max 0.31561126500699682173590
## pulse_max -16.67681038229739343137226
## respiratory_median 0.50282267143173320356198
## Gender_mean
## ALSFRS_slope 0.000000000000000005310160414
## ALT.SGPT._median 0.000000000000008705617513923
## AST.SGOT._median 0.000000000000001504849454444
## Creatinine_median 0.000000000000002431415320339
## Gender_mean 0.000365441381557981756749570
## Glucose_median 0.000000000000000035322538160
## Hematocrit_median 0.000000000000000767309356685
## Platelets_median 0.000000000000000128796424816
## Potassium_max -0.000000000000000023210580728
## pulse_max 0.000000000000000032059726312
## respiratory_median 0.000000000000000019307826839
## Glucose_median
## ALSFRS_slope -0.02528723778479077116676521
## ALT.SGPT._median 0.05230703071003487553136679
## AST.SGOT._median -0.19087057447301936097261432
## Creatinine_median -0.31915666464636965882206709
## Gender_mean 0.00000000000000003532253816
## Glucose_median 0.60763800834882908397105439
## Hematocrit_median 0.04335046886737588950344602
## Platelets_median -3.51721365349072456751855498
## Potassium_max -0.00648723166705809321103127
## pulse_max 1.18675900282751034531258938
## respiratory_median 0.01496097148940514140469382
## Hematocrit_median
## ALSFRS_slope -0.0103089913210307991542347
## ALT.SGPT._median 6.5827182048982457729380258
## AST.SGOT._median 2.2100073622779974336083342
## Creatinine_median -2.7856719249775911961819475
## Gender_mean 0.0000000000000007673093567
## Glucose_median 0.0433504688673758895034460
## Hematocrit_median 6.5576430136922230218488039
## Platelets_median -1.6868551457926668213360699
## Potassium_max 0.0080328509520460160409083
## pulse_max 4.2013260636575582296359244
## respiratory_median 0.0129731920394417291064748
## Platelets_median
## ALSFRS_slope -1.6172807110645177619545620
## ALT.SGPT._median -8.4127783857985516391408964
## AST.SGOT._median -17.9452346920361307525126904
## Creatinine_median -95.3046832102403982389660086
## Gender_mean 0.0000000000000001287964248
## Glucose_median -3.5172136534907245675185550
## Hematocrit_median -1.6868551457926668213360699
## Platelets_median 2446.4240401498409482883289456
## Potassium_max 1.7542246401960310819845290
## pulse_max 50.4675895256150113254989265
## respiratory_median -2.3694731052542983285036371
## Potassium_max
## ALSFRS_slope 0.00237232799339950634759377
## ALT.SGPT._median -0.10192372052336867715993662
## AST.SGOT._median 0.06828445211921496671436671
## Creatinine_median 0.31561126500699682173589622
## Gender_mean -0.00000000000000002321058073
## Glucose_median -0.00648723166705809321103127
## Hematocrit_median 0.00803285095204601604090833
## Platelets_median 1.75422464019603108198452901
## Potassium_max 0.11407146124812245213675510
## pulse_max -0.15434573228099737751684017
## respiratory_median -0.00387037962550750317888837
## pulse_max
## ALSFRS_slope -1.19665954157332854457251869
## ALT.SGPT._median 9.09733748762198324300243257
## AST.SGOT._median 0.74077680338973250773193513
## Creatinine_median -16.67681038229739343137225660
## Gender_mean 0.00000000000000003205972631
## Glucose_median 1.18675900282751034531258938
## Hematocrit_median 4.20132606365755822963592436
## Platelets_median 50.46758952561501132549892645
## Potassium_max -0.15434573228099737751684017
## pulse_max 128.29496776746964314952492714
## respiratory_median -1.23135434060060711125572652
## respiratory_median
## ALSFRS_slope 0.06291250219037319846293599
## ALT.SGPT._median 0.09754227236288845470646436
## AST.SGOT._median 0.20907038244701445228734826
## Creatinine_median 0.50282267143173320356197564
## Gender_mean 0.00000000000000001930782684
## Glucose_median 0.01496097148940514140469382
## Hematocrit_median 0.01297319203944172910647481
## Platelets_median -2.36947310525429832850363709
## Potassium_max -0.00387037962550750317888837
## pulse_max -1.23135434060060711125572652
## respiratory_median 0.35147975339418685569725653
## [,,3]
## ALSFRS_slope
## ALSFRS_slope 0.304081458226215184392771107
## ALT.SGPT._median 0.234412669642543647352539438
## AST.SGOT._median 0.193020337758060711585983427
## Creatinine_median -0.740197291492217246400286967
## Gender_mean 0.000000000000000001225124671
## Glucose_median 0.014782242817255207209536927
## Hematocrit_median -0.060443858943519057635995750
## Platelets_median -3.116797794358534545722250186
## Potassium_max -0.001829526092394927801881854
## pulse_max -0.839910176306504663301666369
## respiratory_median 0.048325942064214484628070778
## ALT.SGPT._median
## ALSFRS_slope 0.234412669642543647352539
## ALT.SGPT._median 135.474908403300446479988750
## AST.SGOT._median 72.656840023400448558277276
## Creatinine_median -19.373117660611168133755200
## Gender_mean -0.000000000000004447763054
## Glucose_median 0.622806710666166329914972
## Hematocrit_median 7.115725667466564097196624
## Platelets_median -0.813157613059962591783858
## Potassium_max 0.178663032637895374810100
## pulse_max 12.649822796825857196267862
## respiratory_median 0.116232302891555394630529
## AST.SGOT._median
## ALSFRS_slope 0.193020337758060711585983
## ALT.SGPT._median 72.656840023400448558277276
## AST.SGOT._median 65.066001558409496396961913
## Creatinine_median 1.915911535218972394645220
## Gender_mean -0.000000000000004629277048
## Glucose_median -0.141318901761545379081042
## Hematocrit_median 2.196345620164208423119589
## Platelets_median -10.640663047084409242870606
## Potassium_max 0.363836054737316449969597
## pulse_max 4.843851142459453029687211
## respiratory_median 0.110633581935144054142484
## Creatinine_median
## ALSFRS_slope -0.74019729149221724640029
## ALT.SGPT._median -19.37311766061116813375520
## AST.SGOT._median 1.91591153521897239464522
## Creatinine_median 222.60345967844509118549468
## Gender_mean -0.00000000000000151091172
## Glucose_median 0.79891243949218226916287
## Hematocrit_median -3.60340234708443452049664
## Platelets_median 2.66250906570993439714812
## Potassium_max 0.35003312792809304632158
## pulse_max -17.20085119214901325790379
## respiratory_median -0.24111958543804537513644
## Gender_mean
## ALSFRS_slope 0.000000000000000001225124671
## ALT.SGPT._median -0.000000000000004447763054306
## AST.SGOT._median -0.000000000000004629277048158
## Creatinine_median -0.000000000000001510911720390
## Gender_mean 0.000307497096843337169337629
## Glucose_median 0.000000000000000002863196499
## Hematocrit_median 0.000000000000000783253585155
## Platelets_median -0.000000000000000007925352988
## Potassium_max -0.000000000000000036843116365
## pulse_max -0.000000000000000895221994002
## respiratory_median -0.000000000000000031473067338
## Glucose_median
## ALSFRS_slope 0.014782242817255207209536927
## ALT.SGPT._median 0.622806710666166329914972266
## AST.SGOT._median -0.141318901761545379081042029
## Creatinine_median 0.798912439492182269162867669
## Gender_mean 0.000000000000000002863196499
## Glucose_median 0.516966007481704958870238897
## Hematocrit_median 0.101845430481395063382343835
## Platelets_median 0.531847043211883696578468061
## Potassium_max 0.003227571694437999984955701
## pulse_max 0.817787168323164115335544011
## respiratory_median 0.001169710935042057257243764
## Hematocrit_median
## ALSFRS_slope -0.0604438589435190576359958
## ALT.SGPT._median 7.1157256674665640971966241
## AST.SGOT._median 2.1963456201642084231195895
## Creatinine_median -3.6034023470844345204966430
## Gender_mean 0.0000000000000007832535852
## Glucose_median 0.1018454304813950633823438
## Hematocrit_median 5.8219494780370304454208963
## Platelets_median -2.4276419537058746556112965
## Potassium_max 0.0390729567332353464048644
## pulse_max 3.6913836568552138572840704
## respiratory_median 0.0887206109169108858569786
## Platelets_median
## ALSFRS_slope -3.116797794358534545722250186
## ALT.SGPT._median -0.813157613059962591783857988
## AST.SGOT._median -10.640663047084409242870606249
## Creatinine_median 2.662509065709934397148117569
## Gender_mean -0.000000000000000007925352988
## Glucose_median 0.531847043211883696578468061
## Hematocrit_median -2.427641953705874655611296475
## Platelets_median 2062.349535565139376558363437653
## Potassium_max 0.377897998245023181151935887
## pulse_max 35.296073648026606406347127631
## respiratory_median -0.486888312603901973751874266
## Potassium_max
## ALSFRS_slope -0.00182952609239492780188185
## ALT.SGPT._median 0.17866303263789537481009972
## AST.SGOT._median 0.36383605473731644996959744
## Creatinine_median 0.35003312792809304632157819
## Gender_mean -0.00000000000000003684311637
## Glucose_median 0.00322757169443799998495570
## Hematocrit_median 0.03907295673323534640486443
## Platelets_median 0.37789799824502318115193589
## Potassium_max 0.09781068677470353134317804
## pulse_max 0.18589254967795756923187867
## respiratory_median -0.00023819356389362129206022
## pulse_max
## ALSFRS_slope -0.839910176306504663301666
## ALT.SGPT._median 12.649822796825857196267862
## AST.SGOT._median 4.843851142459453029687211
## Creatinine_median -17.200851192149013257903789
## Gender_mean -0.000000000000000895221994
## Glucose_median 0.817787168323164115335544
## Hematocrit_median 3.691383656855213857284070
## Platelets_median 35.296073648026606406347128
## Potassium_max 0.185892549677957569231879
## pulse_max 110.134389147179049928126915
## respiratory_median -0.494701302870838632852468
## respiratory_median
## ALSFRS_slope 0.04832594206421448462807078
## ALT.SGPT._median 0.11623230289155539463052946
## AST.SGOT._median 0.11063358193514405414248358
## Creatinine_median -0.24111958543804537513643993
## Gender_mean -0.00000000000000003147306734
## Glucose_median 0.00116971093504205725724376
## Hematocrit_median 0.08872061091691088585697855
## Platelets_median -0.48688831260390197375187427
## Potassium_max -0.00023819356389362129206022
## pulse_max -0.49470130287083863285246821
## respiratory_median 0.30348941115544664226533200
gmm_clust$modelName
## [1] "VEV"
plot(gmm_clust$BIC, legendArgs = list(x = "bottom", ncol = 2, cex = 1))
plot(gmm_clust, what = "density")
plot(gmm_clust, what = "classification")
plot(gmm_clust, what = "uncertainty", dimens = c(2,1), main = "ALSFRS Slope vs. SGPT ")
plot(gmm_clust, what = "uncertainty", dimens = c(4,1), main = "ALSFRS Slope vs. SGPT ")
gmm_clustDR <- MclustDR(gmm_clust, lambda=1)
summary(gmm_clustDR)
## -----------------------------------------------------------------
## Dimension reduction for model-based clustering and classification
## -----------------------------------------------------------------
##
## Mixture model type: Mclust (VEV, 3)
##
## Clusters n
## 1 226
## 2 1275
## 3 722
##
## Estimated basis vectors:
## Dir1 Dir2
## ALSFRS_slope -0.016637210204 -0.0533346714
## ALT.SGPT._median -0.000575165975 0.0068095316
## AST.SGOT._median 0.001041774565 0.0017412605
## Creatinine_median -0.000412172210 -0.0005411544
## Gender_mean 0.998910036674 0.9441136589
## Glucose_median -0.037348371581 0.2876512773
## Hematocrit_median 0.010796483936 -0.0973572505
## Platelets_median 0.000005029877 0.0000336890
## Potassium_max -0.017886088667 0.1153847314
## pulse_max -0.000672512832 0.0007270506
## respiratory_median -0.008278072313 0.0148074238
##
## Dir1 Dir2
## Eigenvalues 1.627633 1.458155
## Cum. % 52.746112 100.000000
plot(gmm_clustDR, what = "boundaries", ngrid = 200)
plot(gmm_clustDR, what = "pairs")
plot(gmm_clustDR, what = "scatterplot")
### The above graph shows three different clusters in seperate gaussian planes which signify the diffrent patient phenotypes that can be automatically and reliably identified and used to predict the change of the ALSFRS slope over time.